library(brms)
library(data.table)

# SET WORKING DIRECTORY
setwd("C:/Users/drmil/Dropbox/White House Lobbying/3YP/APSR Final/Replication Archive/visitor_logs_analyses")

################################################################################

# FIGURE 3: CLINTON ENGAGEMENT LOGIT

# READING IN DATA FILE TO OBTAIN VARIABLE VALUES FOR PREDICTIONS
clinton_logs_analysis <- fread("clinton_logs_analysis.csv", header = TRUE, 
                               stringsAsFactors = FALSE)

load("tableSI6_col1.RData")

# CREATING DATA FRAME FOR PREDICTIONS.  EACH ROW IS ONE OF THE INTEREST-TIME
# PERIOD OBSERVATIONS IN THE ORIGINAL ANALYSIS.  RESOURCES/PARTISAN ALIGNMENT
# ARE SET TO VALUES OF INTEREST AS INDICATED IN FIGURE 3, ALL OTHER VARIABLES
# ARE SET TO THEIR OBSERVED VALUES.  

newdata <- data.frame(lobbyexp_1l=c(rep(quantile(clinton_logs_analysis$lobbyexp_1l, 
                                                    probs = c(0.25), na.rm = TRUE), 
                                           dim(clinton_logs_analysis)[1]),
                                       rep(quantile(clinton_logs_analysis$lobbyexp_1l, 
                                                    probs = c(0.50), na.rm = TRUE), 
                                           dim(clinton_logs_analysis)[1]),
                                       rep(quantile(clinton_logs_analysis$lobbyexp_1l, 
                                                    probs = c(0.75), na.rm = TRUE), 
                                           dim(clinton_logs_analysis)[1]),
                                       rep(clinton_logs_analysis$lobbyexp_1l, 7)),
                      inhouse_lobby=clinton_logs_analysis$inhouse_lobby,
                      any_visits_leq0_1l=clinton_logs_analysis$any_visits_leq0_1l, 
                      industry_pid=c(rep(clinton_logs_analysis$industry_pid, 7), 
                                          rep("I", dim(clinton_logs_analysis)[1]), 
                                          rep("D", dim(clinton_logs_analysis)[1]), 
                                          rep("R", dim(clinton_logs_analysis)[1])),
                      made_donations_l1tol4=c(rep(clinton_logs_analysis$made_donations_l1tol4,3), 
                                       rep(0,dim(clinton_logs_analysis)[1]), 
                                       rep(1,dim(clinton_logs_analysis)[1]*3),
                                       rep(clinton_logs_analysis$made_donations_l1tol4,3)), 
                      amt_donations_1ltol4=c(rep(clinton_logs_analysis$amt_donations_1ltol4,3), 
                                                rep(0, dim(clinton_logs_analysis)[1]),
                                                rep(quantile(clinton_logs_analysis$amt_donations_1ltol4[which(clinton_logs_analysis$made_donations_l1tol4==1)], 
                                                             probs = c(0.25), 
                                                             na.rm = TRUE), dim(clinton_logs_analysis)[1]),
                                                rep(quantile(clinton_logs_analysis$amt_donations_1ltol4[which(clinton_logs_analysis$made_donations_l1tol4==1)], 
                                                             probs = c(0.50), 
                                                             na.rm = TRUE), dim(clinton_logs_analysis)[1]),
                                                rep(quantile(clinton_logs_analysis$amt_donations_1ltol4[which(clinton_logs_analysis$made_donations_l1tol4==1)], 
                                                             probs = c(0.75), 
                                                             na.rm = TRUE), dim(clinton_logs_analysis)[1]),
                                                rep(clinton_logs_analysis$amt_donations_1ltol4,3)),
                      ACC=clinton_logs_analysis$ACC,
                      ADV=clinton_logs_analysis$ADV,
                      AER=clinton_logs_analysis$AER,
                      AGR=clinton_logs_analysis$AGR,
                      ALC=clinton_logs_analysis$ALC,
                      ANI=clinton_logs_analysis$ANI,
                      APP=clinton_logs_analysis$APP,
                      ART=clinton_logs_analysis$ART,
                      AUT=clinton_logs_analysis$AUT,
                      AVI=clinton_logs_analysis$AVI,
                      BAN=clinton_logs_analysis$BAN,
                      BEV=clinton_logs_analysis$BEV,
                      BNK=clinton_logs_analysis$BNK,
                      BUD=clinton_logs_analysis$BUD,
                      CAW=clinton_logs_analysis$CAW,
                      CDT=clinton_logs_analysis$CDT,
                      CHM=clinton_logs_analysis$CHM,
                      CIV=clinton_logs_analysis$CIV,
                      COM=clinton_logs_analysis$COM,
                      CON=clinton_logs_analysis$CON,
                      CPI=clinton_logs_analysis$CPI,
                      CPT=clinton_logs_analysis$CPT,
                      CSP=clinton_logs_analysis$CSP,
                      DEF=clinton_logs_analysis$DEF,
                      DIS=clinton_logs_analysis$DIS,
                      DOC=clinton_logs_analysis$DOC,
                      ECN=clinton_logs_analysis$ECN,
                      EDU=clinton_logs_analysis$EDU,
                      ENG=clinton_logs_analysis$ENG,
                      ENV=clinton_logs_analysis$ENV,
                      FAM=clinton_logs_analysis$FAM,
                      FIN=clinton_logs_analysis$FIN,
                      FIR=clinton_logs_analysis$FIR,
                      FOO=clinton_logs_analysis$FOO,
                      FOR=clinton_logs_analysis$FOR,
                      FUE=clinton_logs_analysis$FUE,
                      GAM=clinton_logs_analysis$GAM,
                      GOV=clinton_logs_analysis$GOV,
                      HCR=clinton_logs_analysis$HCR,
                      HOU=clinton_logs_analysis$HOU,
                      IMM=clinton_logs_analysis$IMM,
                      IND=clinton_logs_analysis$IND,
                      INS=clinton_logs_analysis$INS,
                      LAW=clinton_logs_analysis$LAW,
                      LBR=clinton_logs_analysis$LBR,
                      MAN=clinton_logs_analysis$MAN,
                      MAR=clinton_logs_analysis$MAR,
                      MED=clinton_logs_analysis$MED,
                      MIA=clinton_logs_analysis$MIA,
                      MMM=clinton_logs_analysis$MMM,
                      MON=clinton_logs_analysis$MON,
                      NAT=clinton_logs_analysis$NAT,
                      PHA=clinton_logs_analysis$PHA,
                      POS=clinton_logs_analysis$POS,
                      REL=clinton_logs_analysis$REL,
                      RES=clinton_logs_analysis$RES,
                      RET=clinton_logs_analysis$RET,
                      ROD=clinton_logs_analysis$ROD,
                      RRR=clinton_logs_analysis$RRR,
                      SCI=clinton_logs_analysis$SCI,
                      SMB=clinton_logs_analysis$SMB,
                      SPO=clinton_logs_analysis$SPO,
                      TAX=clinton_logs_analysis$TAX,
                      TEC=clinton_logs_analysis$TEC,
                      TOB=clinton_logs_analysis$TOB,
                      TOR=clinton_logs_analysis$TOR,
                      TOU=clinton_logs_analysis$TOU,
                      TRA=clinton_logs_analysis$TRA,
                      TRD=clinton_logs_analysis$TRD,
                      TRU=clinton_logs_analysis$TRU,
                      UNM=clinton_logs_analysis$UNM,
                      URB=clinton_logs_analysis$URB,
                      UTI=clinton_logs_analysis$UTI,
                      VET=clinton_logs_analysis$VET,
                      WAS=clinton_logs_analysis$WAS,
                      WEL=clinton_logs_analysis$WEL)

# CALCULATING PREDICTED VALUES. AS NOTED IN SI.37, I DO NOT UTILIZE VARYING
# INTERCEPTS IN THIS CALCULATION (I.E., re_formula = NA); INCLUDING THESE
# VARYING INTERCEPTS DOES NOT AFFECT THE SUBSTANTIVE CONCLUSIONS DRAWN FROM
# THE PREDICTED VALUES

predicted_values <- as.data.frame(fitted(tableSI6_col1, newdata, summary = FALSE, 
                                      re_formula = NA))

# EXTRACTING THE PREDICTED VALUES THAT CORRESPOND WITH EACH RESOURCES/PARTISAN
# ALIGNMENT SETTING

predicted_values_25exp <- predicted_values[,1:31673]
predicted_values_50exp <- predicted_values[,31674:63346]
predicted_values_75exp <- predicted_values[,63347:95019]
predicted_values_0contrib <- predicted_values[,95020:126692]
predicted_values_25contrib <- predicted_values[,126693:158365]
predicted_values_50contrib <- predicted_values[,158366:190038]
predicted_values_75contrib <- predicted_values[,190039:221711]
predicted_values_I <- predicted_values[,221712:253384]
predicted_values_D <- predicted_values[,253385:285057]
predicted_values_R <- predicted_values[,285058:316730]

# THE OBSERVED VALUE APPROACH PROPOSED BY HANMER AND KALKAN (2013) UTILIZES THE
# MEAN OF PREDICTED VALUES CALCULATED FOR ALL OBSERVATIONS IN EACH SAMPLING
# ITERATION TO CREATE A DISTRIBUTION OF THE MEAN PREDICTED VALUE ACROSS SAMPLES.
# THE FOLLOWING CODE CALCULATES THESE MEAN PREDICTED VALUES AND BINDS THEM 
# TOGETHER IN A MATRIX WHERE EACH COLUMN IS THE DISTRIBUTION OF MEAN PREDICTED 
# VALUES.

predicted_values_25exp$mean <- rowMeans(predicted_values_25exp)
predicted_values_50exp$mean <- rowMeans(predicted_values_50exp)
predicted_values_75exp$mean <- rowMeans(predicted_values_75exp)
predicted_values_0contrib$mean <- rowMeans(predicted_values_0contrib)
predicted_values_25contrib$mean <- rowMeans(predicted_values_25contrib)
predicted_values_50contrib$mean <- rowMeans(predicted_values_50contrib)
predicted_values_75contrib$mean <- rowMeans(predicted_values_75contrib)
predicted_values_I$mean <- rowMeans(predicted_values_I)
predicted_values_D$mean <- rowMeans(predicted_values_D)
predicted_values_R$mean <- rowMeans(predicted_values_R)

predicted_dists <- cbind(predicted_values_25exp$mean, predicted_values_50exp$mean, 
                         predicted_values_75exp$mean, predicted_values_0contrib$mean, 
                         predicted_values_25contrib$mean, predicted_values_50contrib$mean,
                         predicted_values_75contrib$mean, predicted_values_I$mean,
                         predicted_values_D$mean, predicted_values_R$mean)

# TO DETERMINE THE MEAN AND 95% CREDIBLE INTERVALS OF THESE DISTRIBUTIONS, WE
# TAKE THE MEAN AND 0.025/0.975 QUANTILE VALUES OF EACH COLUMN.

pes <- apply(predicted_dists, MARGIN = 2, FUN = function(x) mean(x))
ci_25 <-apply(predicted_dists, MARGIN = 2, FUN = function(x) quantile(x, probs = c(0.025)))
ci_975 <-apply(predicted_dists, MARGIN = 2, FUN = function(x) quantile(x, probs = c(0.975)))

# TO DETERMINE WHETHER THESE DISTRIBUTIONS OF PREDICTED VALUES ARE
# DISTINGUISHABLE FROM OTHER ANOTHER, WE ALSO NEED TO CALCULATE THE DIFFERENCES
# IN THE DISTRIBUTIONS WE WANT TO COMPARE AND OBTAIN THE MEAN AND 0.025/0.975 
# QUANTILE VALUES.  THE FOLLOWING CODE DOES THIS FOR EACH OF THE COMPARISONS
# MADE IN THE RIGHT PANE OF FIGURE 3.

diff_quants <- data.frame("pe"=numeric(0), "ci_25"=numeric(0), "ci_975"=numeric(0))

# DIFFERENCE BETWEEN PREDICTED VALUES WHEN EXPENDITURES ARE SET TO THE FIRST
# QUARTILE VALUE VS. THE SECOND QUARTILE VALUE
predicted_diff_1st_to_2nd_exp <- predicted_dists[,2] - predicted_dists[,1]

diff_quants <- rbind(diff_quants, data.frame("pe"=mean(predicted_diff_1st_to_2nd_exp), 
                                             "ci_25"=quantile(predicted_diff_1st_to_2nd_exp, probs = c(0.025)), 
                                             "ci_975"=quantile(predicted_diff_1st_to_2nd_exp, probs = c(0.975))))

# DIFFERENCE BETWEEN PREDICTED VALUES WHEN EXPENDITURES ARE SET TO THE SECOND
# QUARTILE VALUE VS. THE THIRD QUARTILE VALUE
predicted_diff_2nd_to_3rd_exp <- predicted_dists[,3] - predicted_dists[,2]

diff_quants <- rbind(diff_quants, data.frame("pe"=mean(predicted_diff_2nd_to_3rd_exp), 
                                             "ci_25"=quantile(predicted_diff_2nd_to_3rd_exp, probs = c(0.025)), 
                                             "ci_975"=quantile(predicted_diff_2nd_to_3rd_exp, probs = c(0.975))))

# DIFFERENCE BETWEEN PREDICTED VALUES WHEN CONTRIBUTIONS ARE SET TO NONE VS.
# THE FIRST QUARTILE VALUE
predicted_diff_0_to_1st_contrib <- predicted_dists[,5] - predicted_dists[,4]

diff_quants <- rbind(diff_quants, data.frame("pe"=mean(predicted_diff_0_to_1st_contrib), 
                                             "ci_25"=quantile(predicted_diff_0_to_1st_contrib, probs = c(0.025)), 
                                             "ci_975"=quantile(predicted_diff_0_to_1st_contrib, probs = c(0.975))))

# DIFFERENCE BETWEEN PREDICTED VALUES WHEN CONTRIBUTIONS ARE SET TO THE FIRST 
# QUARTILE VALUE VS. THE SECOND QUARTILE VALUE
predicted_diff_1st_to_2nd_contrib <- predicted_dists[,6] - predicted_dists[,5]

diff_quants <- rbind(diff_quants, data.frame("pe"=mean(predicted_diff_1st_to_2nd_contrib), 
                                             "ci_25"=quantile(predicted_diff_1st_to_2nd_contrib, probs = c(0.025)), 
                                             "ci_975"=quantile(predicted_diff_1st_to_2nd_contrib, probs = c(0.975))))

# DIFFERENCE BETWEEN PREDICTED VALUES WHEN CONTRIBUTIONS ARE SET TO THE SECOND 
# QUARTILE VALUE VS. THE THIRD QUARTILE VALUE
predicted_diff_2nd_to_3rd_contrib <- predicted_dists[,7] - predicted_dists[,6]

diff_quants <- rbind(diff_quants, data.frame("pe"=mean(predicted_diff_2nd_to_3rd_contrib), 
                                             "ci_25"=quantile(predicted_diff_2nd_to_3rd_contrib, probs = c(0.025)), 
                                             "ci_975"=quantile(predicted_diff_2nd_to_3rd_contrib, probs = c(0.975))))

# DIFFERENCE BETWEEN PREDICTED VALUES WHEN PARTISAN ALIGNMENT IS SET TO 
# INDEPENDENT VS. DEMOCRATIC
predicted_diff_I_to_D <- predicted_dists[,9] - predicted_dists[,8]

diff_quants <- rbind(diff_quants, data.frame("pe"=mean(predicted_diff_I_to_D), 
                                             "ci_25"=quantile(predicted_diff_I_to_D, probs = c(0.025)), 
                                             "ci_975"=quantile(predicted_diff_I_to_D, probs = c(0.975))))

# DIFFERENCE BETWEEN PREDICTED VALUES WHEN PARTISAN ALIGNMENT IS SET TO 
# REPUBLICAN VS. DEMOCRATIC
predicted_diff_R_to_D <- predicted_dists[,9] - predicted_dists[,10]

diff_quants <- rbind(diff_quants, data.frame("pe"=mean(predicted_diff_R_to_D), 
                                             "ci_25"=quantile(predicted_diff_R_to_D, probs = c(0.025)), 
                                             "ci_975"=quantile(predicted_diff_R_to_D, probs = c(0.975))))

# RECREATING FIGURE 3; THIS CODE CREATES THE LEFT AND RIGHT PANES OF THE FIGURE
# SEPARATELY

pdf(file = "fig3_left.pdf", family = "Times", height = 9, width = 7)
par(mar=c(5.1,18,2,.7))
plot(NULL, ylim=c(0.5, 13), xlim=c(0,0.75), axes=FALSE,  bg = "black",
     tck=-.02, cex.axis=0.9, cex=1.5,
     xlab="", ylab="", yaxt="n", xaxt="n", main="")
points(pes,
       c(12, 11, 10, 8, 7, 6, 5, 2, 3, 1),
       pch=19, cex=1.5)
segments(x0=ci_25,
         x1=ci_975,
         y0=c(12, 11, 10, 8, 7, 6, 5, 2, 3, 1), cex=1.25)
axis(1,at=c(0.00, 0.25, 0.50, 0.75),
     labels=sprintf("%.2f",c(0.00, 0.25, 0.50, 0.75)),cex.axis = 1.75, lwd=2)
axis(2,at=c(12, 11, 10, 8, 7, 6, 5, 2, 3, 1),
     las=2,
     labels=c("1st Quartile (<$10,000)", "2nd Quartile ($20,000)", "3rd Quartile ($80,000)",
              "None", "1st Quartile ($20,375)", "2nd Quartile ($66,025)", 
              "3rd Quartile ($196,954)",
              "Independent", "Democratic", "Republican"),
     tck=0,
     lwd =0,
     line = 0,
     cex.axis=1.5)
axis(2,at=c(12.75,8.75,3.75),
     las=2,
     labels=c(expression(~bold(~underline("Lobbying Expenditures"))), 
              expression(~bold(~underline("Campaign Contributions"))), 
              expression(~bold(~underline("Partisan Alignment"))) 
     ),
     tck=0,
     lwd = 0,
     line = 0,
     cex.axis=1.75)
mtext("Pr(Engagement)", side=1, line = 3, cex =2.25)
#mtext("Clinton", side = 3, line=1, cex = 3.5)
text(pes,
     c(12, 11, 10, 8, 7, 6, 5, 2, 3, 1)+0.3,
     sprintf("%.2f",round(pes, 2)), cex=1.5)
dev.off()

pdf(file = "fig3_right.pdf", family = "Times", height = 9, width = 7)
par(mar=c(5.1,7,2,.7))
plot(NULL, ylim=c(0.5, 13), xlim=c(-0.05,0.15), axes=FALSE,  bg = "black",
     tck=-.02, cex.axis=0.9, cex=1.5,
     xlab="", ylab="", yaxt="n", xaxt="n", main="")
abline(v=0, lty=2, col="gray")
points(diff_quants$pe,
       c(11, 10, 7, 6, 5, 2, 1),
       pch=19, cex=1.5)
segments(x0=diff_quants$ci_25,
         x1=diff_quants$ci_975,
         y0=c(11, 10, 7, 6, 5, 2, 1), cex=1.25)
axis(1,at=c(-0.05, 0.00, 0.05, 0.10, 0.15),
     labels=c(sprintf("%.2f",c(-0.05, 0.00, 0.05, 0.10, 0.15))),cex.axis = 1.75, lwd=2)
axis(2,at=c(11, 10, 7, 6, 5, 2, 1),
     las=2,
     labels=c("Q2-Q1", "Q3-Q2",
              "Q1-None", "Q2-Q1", "Q3-Q2",
              "Dem.-Ind.", "Dem.-Rep."),
     tck=0,
     lwd =0,
     line = 0,
     cex.axis=1.5)
mtext("Difference in Pr(Engagement)", side=1, line = 3, cex =2.25)
text(diff_quants$pe,
     c(11, 10, 7, 6, 5, 2, 1)+0.3,
     sprintf("%.2f",round(diff_quants$pe, 2)), cex=1.5)
dev.off()